import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import os
####replace with your path
files = os.listdir(r'C:\Users\lucas\general\cursos\09-python\01_data_analyst_python\04-project_covid_19_stats\Covid-19')
files
['country_wise_latest.csv', 'covid_19_clean_complete.csv', 'day_wise.csv', 'full_grouped.csv', 'usa_country_wise.csv', 'worldometer_data.csv']
def read_data(path, filename):
return pd.read_csv(path+'/'+filename)
path="C:/Users/lucas/general/cursos/09-python/01_data_analyst_python/04-project_covid_19_stats/Covid-19"
world_data = read_data(path, 'worldometer_data.csv')
world_data.head(3)
| Country/Region | Continent | Population | TotalCases | NewCases | TotalDeaths | NewDeaths | TotalRecovered | NewRecovered | ActiveCases | Serious,Critical | Tot Cases/1M pop | Deaths/1M pop | TotalTests | Tests/1M pop | WHO Region | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | USA | North America | 3.311981e+08 | 5032179 | NaN | 162804.0 | NaN | 2576668.0 | NaN | 2292707.0 | 18296.0 | 15194.0 | 492.0 | 63139605.0 | 190640.0 | Americas |
| 1 | Brazil | South America | 2.127107e+08 | 2917562 | NaN | 98644.0 | NaN | 2047660.0 | NaN | 771258.0 | 8318.0 | 13716.0 | 464.0 | 13206188.0 | 62085.0 | Americas |
| 2 | India | Asia | 1.381345e+09 | 2025409 | NaN | 41638.0 | NaN | 1377384.0 | NaN | 606387.0 | 8944.0 | 1466.0 | 30.0 | 22149351.0 | 16035.0 | South-EastAsia |
day_wise = read_data(path, files[2])
group_data = read_data(path, files[3])
usa_data = read_data(path, files[4])
province_data = read_data(path, files[1])
province_data.shape
(49068, 10)
day_wise.duplicated().sum()
group_data.duplicated().sum()
usa_data.duplicated().sum()
province_data.duplicated().sum()
0
day_wise.isnull().sum()
group_data.isnull().sum()
usa_data.isnull().sum()
province_data.isnull().sum()
Province/State 34404 Country/Region 0 Lat 0 Long 0 Date 0 Confirmed 0 Deaths 0 Recovered 0 Active 0 WHO Region 0 dtype: int64
world_data.columns
Index(['Country/Region', 'Continent', 'Population', 'TotalCases', 'NewCases',
'TotalDeaths', 'NewDeaths', 'TotalRecovered', 'NewRecovered',
'ActiveCases', 'Serious,Critical', 'Tot Cases/1M pop', 'Deaths/1M pop',
'TotalTests', 'Tests/1M pop', 'WHO Region'],
dtype='object')
import plotly.express as px
columns=['TotalCases', 'TotalDeaths', 'TotalRecovered', 'ActiveCases']
for i in columns:
fig = px.treemap(world_data.iloc[0:20], values=i, path=['Country/Region'],
title='Treemap of different countries w.r.t to their {}'.format(i))
fig.show()
day_wise.head(3)
| Date | Confirmed | Deaths | Recovered | Active | New cases | New deaths | New recovered | Deaths / 100 Cases | Recovered / 100 Cases | Deaths / 100 Recovered | No. of countries | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2020-01-22 | 555 | 17 | 28 | 510 | 0 | 0 | 0 | 3.06 | 5.05 | 60.71 | 6 |
| 1 | 2020-01-23 | 654 | 18 | 30 | 606 | 99 | 1 | 2 | 2.75 | 4.59 | 60.00 | 8 |
| 2 | 2020-01-24 | 941 | 26 | 36 | 879 | 287 | 8 | 6 | 2.76 | 3.83 | 72.22 | 9 |
day_wise.columns
Index(['Date', 'Confirmed', 'Deaths', 'Recovered', 'Active', 'New cases',
'New deaths', 'New recovered', 'Deaths / 100 Cases',
'Recovered / 100 Cases', 'Deaths / 100 Recovered', 'No. of countries'],
dtype='object')
px.line(day_wise, x='Date', y=['Confirmed', 'Deaths', 'Recovered', 'Active'], title='COVIDcases w.r. t to date', template='plotly_dark')
world_data.head(3)
| Country/Region | Continent | Population | TotalCases | NewCases | TotalDeaths | NewDeaths | TotalRecovered | NewRecovered | ActiveCases | Serious,Critical | Tot Cases/1M pop | Deaths/1M pop | TotalTests | Tests/1M pop | WHO Region | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | USA | North America | 3.311981e+08 | 5032179 | NaN | 162804.0 | NaN | 2576668.0 | NaN | 2292707.0 | 18296.0 | 15194.0 | 492.0 | 63139605.0 | 190640.0 | Americas |
| 1 | Brazil | South America | 2.127107e+08 | 2917562 | NaN | 98644.0 | NaN | 2047660.0 | NaN | 771258.0 | 8318.0 | 13716.0 | 464.0 | 13206188.0 | 62085.0 | Americas |
| 2 | India | Asia | 1.381345e+09 | 2025409 | NaN | 41638.0 | NaN | 1377384.0 | NaN | 606387.0 | 8944.0 | 1466.0 | 30.0 | 22149351.0 | 16035.0 | South-EastAsia |
pop_test_ratio = world_data['Population']/world_data['TotalTests'].iloc[0:20]
fig=px.bar(world_data.iloc[0:20], x='Country/Region', y=pop_test_ratio[0:20], color='Country/Region', title='Pop to tests ratio')
fig.show()
world_data.columns
Index(['Country/Region', 'Continent', 'Population', 'TotalCases', 'NewCases',
'TotalDeaths', 'NewDeaths', 'TotalRecovered', 'NewRecovered',
'ActiveCases', 'Serious,Critical', 'Tot Cases/1M pop', 'Deaths/1M pop',
'TotalTests', 'Tests/1M pop', 'WHO Region'],
dtype='object')
px.bar(world_data.iloc[0:20], x='Country/Region', y=['Serious,Critical', 'TotalDeaths', 'TotalRecovered', 'ActiveCases', 'TotalCases'])
fig = px.bar(world_data.iloc[0:20],
x='TotalCases',
y='Country/Region',
text='TotalCases',
color='TotalCases'
)
fig.update_layout(template='plotly_dark', title='20 countries having max confirmed cases')
fig.show()
fig = px.bar(world_data.sort_values(by='TotalDeaths', ascending=False)[0:20],
x='TotalDeaths',
y='Country/Region',
text='TotalDeaths',
color='TotalDeaths'
)
fig.update_layout(template='plotly_dark', title='20 countries having max TotalDeaths')
fig.show()
fig = px.bar(world_data.sort_values(by='ActiveCases', ascending=False)[0:20],
x='ActiveCases',
y='Country/Region',
text='ActiveCases',
color='ActiveCases'
)
fig.update_layout(template='plotly_dark', title='20 countries having max Active Cases')
fig.show()
fig = px.bar(world_data.sort_values(by='TotalRecovered', ascending=False)[0:20],
x='TotalRecovered',
y='Country/Region',
text='TotalRecovered',
color='TotalRecovered'
)
fig.update_layout(template='plotly_dark', title='20 countries having max Total Recovered Cases')
fig.show()
labels=world_data[0:15]['Country/Region'].values
cases=['TotalCases', 'TotalDeaths', 'TotalRecovered', 'ActiveCases']
for i in cases:
fig = px.pie(world_data[0:15],
values=i,
names=labels,
title="{} recorded w.r.t to who region of 15 worst affected countries".format(i),
hole=0.2,
template="seaborn")
fig.update_traces(textposition="inside", textinfo="value+percent+label")
fig.show()
deaths_to_confirmed = world_data['TotalDeaths']/world_data['TotalCases']
deaths_to_confirmed
0 0.032353
1 0.033810
2 0.020558
3 0.016752
4 0.017845
...
204 0.076923
205 NaN
206 NaN
207 NaN
208 0.100000
Length: 209, dtype: float64
fig = px.bar(world_data,
x='Country/Region',
y=deaths_to_confirmed
)
fig.update_layout(template='plotly_dark', title='Deaths to confirmed ratio of worst affected countries')
fig.show()
deaths_to_recovered = world_data['TotalDeaths']/world_data['TotalRecovered']
fig = px.bar(world_data,
x='Country/Region',
y=deaths_to_recovered
)
fig.update_layout(template='plotly_dark', title='Deaths to recovered ratio of worst affected countries')
fig.show()
deaths_to_test = world_data['TotalDeaths']/world_data['TotalTests']
fig = px.bar(world_data,
x='Country/Region',
y=deaths_to_test
)
fig.update_layout(template='plotly_dark', title='Deaths to tests ratio of worst affected countries')
fig.show()
serious_to_deaths = world_data['Serious,Critical']/world_data['TotalDeaths']
fig = px.bar(world_data,
x='Country/Region',
y=serious_to_deaths
)
fig.update_layout(template='plotly_dark', title='Deaths to serious ratio of worst affected countries')
fig.show()